
/*************************************************
Program: Stata_04_combine_census_bea.sas
Author: hannah rubinton
Date: 1/31/2019

input: 
firm_import.dta created in stata_03
bea data created in stata_02
firm data created in stata_01 

outputs: 
firmXcountry dataset: ./output/country_imp_aff.dta
firm dataset: 


Notes: 
Create two datasets by merging together census and affiiliate data. One at the firm level and one at the firmXcountry level

**make firm X country dataset by merging together the firm import data and the bea data 
**make sample restrictions to firms that have either domestic or foreign M
**************************************************/
cd "/"  /* PROJECT ROOT FOLDER */
global output data
global data data



cap program drop combine_firm_data
program define combine_firm_data
  syntax , year(integer)


  **collapse the firm country import data to firm level
  use "$data/firm_country_import`year'.dta", clear

  *create a temporary dataset with indicator of whether a firm is an importer and number of import countries to merge into the firm level data below 
  gen importer = imp_value>0 & imp_value~=.
  gen rp_importer = rp_imp_val>0 & rp_imp_val~=.
  keep firmid importer imp_value rp_imp_val rp_importer
  collapse (sum) importer rp_importer imp_ rp_imp_val, by(firmid)
  rename importer number_import_countries
  rename rp_importer number_rp_import_countries
  replace number_import_countries=0 if missing(number_import_countries)
  replace number_rp_import_countries=0 if missing(number_rp_import_countries)
  gen importer = number_import_countries>0
  gen rp_importer = number_rp_import_countries>0

  *rescale imports
  foreach var in imp_value rp_imp_value {
    replace `var'=`var'/1000
    }

  tempfile firm_importers
  save `firm_importers'

  **collapse the firm country export data to firm level
  use "$data/firm_country_export`year'.dta", clear

  *create a temporary dataset with indicator of whether a firm is an exporter and number of export countries to merge into the firm level data below 
  gen exporter = exp_value>0 & exp_value~=.
  gen rp_exporter = rp_exp_val>0 & rp_exp_val~=.
  keep firmid exporter exp_value rp_exp_val rp_exporter
  collapse (sum) exporter rp_exporter exp_ rp_exp_val, by(firmid)
  rename exporter number_export_countries
  rename rp_exporter number_rp_export_countries
  replace number_export_countries=0 if missing(number_export_countries)
  replace number_rp_export_countries=0 if missing(number_rp_export_countries)
  gen exporter = number_export_countries>0
  gen rp_exporter = number_rp_export_countries>0

  *rescale exports
  foreach var in exp_value rp_exp_value {
    replace `var'=`var'/1000
    }

  tempfile firm_exporters
  save `firm_exporters'


  /*****************************************
  **make firm dataset by merging together the firm data and the bea firm data 
  ******************************************/
  use "$data/firm_`year'.dta", clear


  ***BEA DATASETS:
  *merge in firm level bea dataset to identify MNES and M employment abroad 
  merge 1:1 firmid using "$data/mnl_foreign_affiliates_firms`year'.dta"
    gen mnl= _merge==3
    drop if _merge==2
    drop _merge

  merge 1:1 firmid using "$data/formnl_us_affiliates_firms`year'.dta"
    drop if _merge==2
    gen formnl= _merge==3
    drop _merge

  /*
  merge m:1 firmid using "$output/wtf_final_assignment.dta"
    drop if _merge==2
    gen wtf_indicator=1 if _merge==3
    drop _merge
  */

  *merge in import data
  merge 1:1 firmid using `firm_importers'
    drop if _merge==2
    drop _merge

  *merge in export data
  merge 1:1 firmid using `firm_exporters'
    drop if _merge==2
    drop _merge

  //If firms were not in the importer or exporter dataset, then these dummy variables will be missing
  //replace with 0
  foreach var of varlist importer rp_importer exporter rp_exporter {
    replace `var' = 0 if `var'==.
  }


 

  *Bring in MNE status indicator from the merged BEA and COS data
  merge 1:1 firmid using $data/mne_status`year'.dta
    tab _m mne_status_supp, miss
    tab in_cos _merge, miss
    
      *Make a dataset with the unmatched BEA data to check
      preserve
	keep if _merge==2
	keep firmid in_bea in_cos mne_status* 
	save $data/bea_missing_firmids.dta, replace
	di  _N
      restore  
      
      
      drop if _merge==2
      drop _merge
    
  *Make firm type variable
  foreach var in sales emp {
    replace man_aff_`var'=0 if man_aff_`var'==.
    }
  gen foreign_manufacturing = (man_aff_sales>0 | man_aff_emp>0)

  *Make a firm type
  gen type_d="MAN" if emp_man>0 & (emp_prof==0 & emp_manage==0)
    replace type_d="MAN MPRO" if emp_man>0 & (emp_prof>0 | emp_manage>0)
    replace type_d="FOR M" if emp_man==0 & foreign_manuf==1
    replace type_d="MPRO" if emp_man==0 & (emp_prof>0 | emp_manage>0) & foreign_manufacturing==0
    replace type_d="OTH" if emp_man==0 & (emp_prof==0 & emp_manage==0) & foreign_manufacturing==0
    
  tab type_d, miss

  gen type="MAN" if emp_man>0 & (emp_prof==0 & emp_manage==0)
    replace type="MAN MPRO" if emp_man>0 & (emp_prof>0 | emp_manage>0)
    replace type="MPRO" if emp_man==0 & (emp_prof>0 | emp_manage>0) 
    replace type="OTH" if emp_man==0 & (emp_prof==0 & emp_manage==0) 
    
  tab type, miss


  gen imp_sales_share=imp_val/sales
  tabstat imp_value imp_sales_share, by(type) stats(sum mean p50)


  save "$data/firm_`year'_withtypes.dta", replace 
  use "$data/firm_`year'_withtypes.dta", clear


  *Collapse all the domestic activity
  replace mne_status="DOM" if mne_status==""
  replace mne_status="FOR" if mne_status=="FO2"  
  replace firmid="nonMNE" if mne_status=="DOM"
  replace mne_status="DOM IMP" if mne_status=="DOM" & importer==1
  gen firms=1
  drop naics4_primary 
  drop n4_main_sales
  //this is only collapsing nonMNE firms for which we changed firmid to "nonMNE", otherwise the dataset is already unique on firmid
  collapse (sum) emp_census-dupes  aff_sales-mnl in_sales-number_of_man_usaff firms importer number_import_countries  ///
    imp_val rp_imp* exporter number_export_countries exp_val rp_exp* foreign_manuf, by(firmid type mne_status for_mnl_country) fast
  replace number_import_countries=. if firmid=="nonMNE"
  replace number_export_countries=. if firmid=="nonMNE"
  
  *check that firmid is unique 
  duplicates tag firmid, gen(dupe_check)
  tab dupe_check
  drop dupe_check

  *bys mne_type: summ number*  aff_emp  aff_sales  if mne_type=="MNE", d

  *Summary stats of activity by type
  gen sh_us_man_emp=emp_man/(emp_man+man_aff_emp)

  *Calculate vertical and horizontal measures
  gen sh_aff_us=(aff_sales_us_unaff+aff_sales_us_rep)/aff_sales
  gen sh_us_intra=aff_sales_us_rep/(aff_sales_us_unaff+aff_sales_us_rep)
  gen sh_aff_local=aff_sales_local_unaff/aff_sales
  *gen sh_aff_platform=aff_sales   ???  NOT sure what to use here...



  *compare bea_emp at domestic affiliates of foreign firms to lbd employment 
  corr emp_lbd in_emp 
  corr emp_lbd in_emp if in_emp>0
  gen lemp_lbd = log(emp_lbd)
  gen lin_emp = log(in_emp) 
  reg lin_emp lemp_lbd 


  *compare bea reported domestic employment of U.S. MNLs to lbd employment 
  corr emp_lbd aff_emp_parent 
  corr emp_lbd aff_emp_parent if aff_emp_parent>0
  gen laff_emp_parent = log(aff_emp_parent) 
  reg laff_emp_parent lemp_lbd 


  save "$data/census_bea_firm`year'.dta", replace 

end // combine_firm_data


combine_firm_data, year(2007)
*combine_firm_data, year(2012)


